/**************************************************************;

This file cleans the institution names from the PRC licensure data in order to create an institution-year level dataset of pass rates;
*It also merges on the geographic data to determine where each institution is located;

**************************************************************/
capture log close
capture program drop _all
capture macro drop _all
set matsize 800
drop _all
set more off
#delimit ;

*Set paths;

global root 
global dofile "${root}Do Files/";
global rawdata "${root}Stata/";
global data "${root}Stata/";
global geodata "${root}geo";
**************************************************************;
*Insheet geographic location of each school;
insheet using "${geodata}School-List-Final_forstata.csv", names;
save "${data}School-List-Final_forstata.dta", replace; 

tab province;
replace province=upper(province);
egen tag=tag(province);
count if tag==1;

tab province;

keep school municipality province;
gen school_orig=school;

do "${root}0.0 Clean Institution Names.do";

replace province="LANAO DEL NORTE" if school=="ADVENTIST MEDICAL CENTER COLLEGE- ILLIGAN INC";
replace muni="Iligan City" if school=="ADVENTIST MEDICAL CENTER COLLEGE- ILLIGAN INC";
replace province="CAVITE" if school_orig=="PHILIPPINE UNION COLLEGE-CALOOCAN"|school_orig=="PHILIPPINE UNION COLLEGE";
replace muni="Silang" if school_orig=="PHILIPPINE UNION COLLEGE-CALOOCAN"|school_orig=="PHILIPPINE UNION COLLEGE";
replace province="NUEVA ECIJA" if school=="ARAULLO UNIVERSITY";
replace muni="Cabanatuan City" if school=="ARAULLO UNIVERSITY";
replace province="THIRD DISTRICT" if school=="ASIAN COLLEGE- QUEZON CITY";
replace muni="Quezon City" if school=="ASIAN COLLEGE- QUEZON CITY";
replace muni="Abucay" if school=="BATAAN PENNISULA STATE UNIVERSITY- ABUCAY CAMPUS";
replace muni="Pontevedra" if school=="CAPIZ STATE UNIVERSITY - PONTEVEDRA CAMPUS";
replace province="TARLAC" if school=="CARTHEL SCIENCE EDUCATIONAL FOUNDATION,INC. (OLRA)";
replace muni="San Manuel" if school=="CARTHEL SCIENCE EDUCATIONAL FOUNDATION,INC. (OLRA)";
replace muni="Cavite City" if school=="CAVITE STATE UNIVERSITY-CAVITE CITY";
replace province="FIRST DISTRICT" if school=="CENTRO ESCOLAR UNIVERSITY - MANILA";
replace province="ANTIQUE" if school=="COLEGIO DE LA IMMACULADA CONCEPCION - ANTIQUE";
replace muni="" if school=="COLEGIO DE LA IMMACULADA CONCEPCION - ANTIQUE";
replace province="NUEVA ECIJA" if school=="COLLEGE OF THE IMMACULATE CONCEPTION - CABANATUAN";
replace muni="Cabanatuan City" if school=="COLLEGE OF THE IMMACULATE CONCEPTION - CABANATUAN";
replace province="CAVITE" if school=="DE LA SALLE UNIVERSITY - DASMARINAS";
replace muni="Dasmarinas" if school=="DE LA SALLE UNIVERSITY - DASMARINAS";
replace province="OCCIDENTAL MINDORO" if school=="DIVINE WORD COLLEGE OF SAN JOSE";
replace muni="San Jose" if school=="DIVINE WORD COLLEGE OF SAN JOSE";
replace province="ALBAY" if school=="DIVINE WORD COLLEGE - LEGAZPI";
replace muni="Legazpi City" if school=="DIVINE WORD COLLEGE - LEGAZPI";
replace province="SECOND DISTRICT" if school=="DOMINICAN COLLEGE - SAN JUAN";
replace muni="San Juan" if school=="DOMINICAN COLLEGE - SAN JUAN";
replace province="QUEZON" if school=="EASTERN TAYABAS COLLEGE";
replace muni="Lopez" if school=="EASTERN TAYABAS COLLEGE";
replace province="BATAAN" if school=="EASTWOODS PROFESSIONAL COLLEGE OF SCI & TECH - BALANGA,BATAAN";
replace province="FIRST DISTRICT" if school=="EMILIO AGUINALDO COLLEGE - MANILA";
replace muni="Paco" if school=="EMILIO AGUINALDO COLLEGE - MANILA";
replace province="FIRST DISTRICT" if school=="FAR EASTERN UNIVERSITY - MANILA";
replace muni="Sampaloc" if school=="FAR EASTERN UNIVERSITY - MANILA";
replace province="NEGROS ORIENTAL" if school=="FOUNDATION UNIVERSITY";
replace province="ORIENTAL MINDORO" if school=="GRACE MISSION H.S.H.";
replace muni="Socorro" if school=="GRACE MISSION H.S.H.";
replace province="PALAWAN" if school=="HOLY TRINITY UNIVERSITY";
replace muni="PUERTO PRINCESA" if school=="HOLY TRINITY UNIVERSITY";
replace province="ALBAY" if school=="IMMACULATE CONCEPCION HOSPITAL SCHOOL OF NURSING";
replace muni="Daraga" if school=="IMMACULATE CONCEPCION HOSPITAL SCHOOL OF NURSING";
replace province="MISAMIS ORIENTAL" if school=="LA SALLE UNIVERSITY (FOR.IMMA.CONCEPCION-LA SALLE)";
replace muni="Ozamis City" if school=="LA SALLE UNIVERSITY (FOR.IMMA.CONCEPCION-LA SALLE)";
replace province="FIRST DISTRICT" if school=="LYCEUM OF THE PHILIPPINES UNIVERSITY- MANILA";
replace muni="Intramuros" if school=="LYCEUM OF THE PHILIPPINES UNIVERSITY- MANILA";
replace province="SECOND DISTRICT" if school=="M. L. BUENCONSEJO SCHOOL";
replace muni="Pasig" if school=="M. L. BUENCONSEJO SCHOOL";
replace province="QUEZON" if province=="QUEZON PROVINCE";
replace province="FIRST DISTRICT" if school=="MAPUA UNIVERSITY- MANILA";
replace province="ZAMBOANGA DEL SUR" if school=="MEDINA COLLEGE - PAGADIAN (ST.JOHN GEN. HOSP. & COLL.)";
replace muni="Pagadian City" if school=="MEDINA COLLEGE - PAGADIAN (ST.JOHN GEN. HOSP. & COLL.)";
replace province="MISAMIS OCCIDENTAL" if school=="MEDINA COLLEGE- OZAMIZ CITY";
replace muni="Ozamis City" if school=="MEDINA COLLEGE- OZAMIZ CITY";
replace province="FIRST DISTRICT" if school=="METROPOLITAN MEDICAL CENTER OF COLLEGE OF ARTS, SCIE & TECH";
replace muni="Sta. Cruz" if school=="METROPOLITAN MEDICAL CENTER OF COLLEGE OF ARTS, SCIE & TECH";
replace province="" if school=="MINDANAO";
replace muni="" if school=="MINDANAO";
replace province="ZAMBOANGA SIBUGAY" if muni=="Buug";
replace province="MISAMIS OCCIDENTAL" if muni=="Ozamis City"|muni=="Oroquieta City";
replace province="MISAMIS ORIENTAL" if school=="MISAMIS ORIENTAL INSTITUTE OF SCIENCE & TECHNOLOGY";
replace muni="Cogon" if school=="MISAMIS ORIENTAL INSTITUTE OF SCIENCE & TECHNOLOGY";
replace province="BUKIDNON" if school=="MOUNTAIN VIEW COLLEGE";
replace muni="Valencia City" if school=="MOUNTAIN VIEW COLLEGE";
replace province="PANGASINAN" if school=="NAZARENUS SCHOOL OF NURSING";
replace muni="Calasiao" if school=="NAZARENUS SCHOOL OF NURSING";
replace province="NEGROS ORIENTAL" if muni=="Siaton"|muni=="Dumaguete City"|muni=="Sibulan";
replace province="MISAMIS ORIENTAL" if school=="NORTHERN MINDANAO POLYTECHNIC SCHOOL";
replace muni="Cagayan de Oro" if school=="NORTHERN MINDANAO POLYTECHNIC SCHOOL";
replace province="THIRD DISTRICT" if school=="OUR LADY OF FATIMA UNIVERSITY- VALENZUELA CITY";
replace muni="Valenzuela" if school=="OUR LADY OF FATIMA UNIVERSITY- VALENZUELA CITY";
replace province="CAMARINES NORTE" if school=="OUR LADY OF LOURDES COLLEGE FOUNDATION";
replace muni="Daet" if school=="OUR LADY OF LOURDES COLLEGE FOUNDATION";
replace province="" if school=="OUR LADY OF REMEDIES ACADEMY COLLEGE FDTN SCHOOL OF MIDWIFERY";
replace muni="" if school=="OUR LADY OF REMEDIES ACADEMY COLLEGE FDTN SCHOOL OF MIDWIFERY";
replace province="FOURTH DISTRICT" if school=="PHILIPPINE AIRLINES AVIATION SCHOOL";
replace muni="Pasay" if school=="PHILIPPINE AIRLINES AVIATION SCHOOL";
replace province="FIRST DISTRICT" if school=="PHILIPPINE CHRISTIAN UNIVERSITY- MANILA";
replace muni="Malate" if school=="PHILIPPINE CHRISTIAN UNIVERSITY- MANILA";
replace province="FIRST DISTRICT" if school=="PHILIPPINE MERCHANT MARINE SCHOOL - MANILA";
replace muni="Sta. Cruz" if school=="PHILIPPINE MERCHANT MARINE SCHOOL - MANILA";
replace province="QUEZON" if school=="POLYTECHNIC UNIVERSITY OF THE PHILIPPINES- LOPEZ";
replace muni="Lopez" if school=="POLYTECHNIC UNIVERSITY OF THE PHILIPPINES- LOPEZ";
replace province="SULTAN KUDARAT" if school=="QUEZON COLLEGE OF SOUTHERN PHILLIPINES";
replace muni="Tacurong City" if school=="QUEZON COLLEGE OF SOUTHERN PHILLIPINES";
replace province="SECOND DISTRICT" if school=="QUEZON COLLEGE OF SOUTHERN PHILLIPINES";
replace muni="Quezon City" if school=="QUEZON COLLEGE OF SOUTHERN PHILLIPINES";
replace province="ZAMBALES" if school=="PRESIDENT RAMON MAGSAYSAY STATE UNIVERSITY-IBA";
replace muni="Iba" if school=="PRESIDENT RAMON MAGSAYSAY STATE UNIVERSITY-IBA";
replace province="LEYTE" if school=="REMEDIOS T. ROMUALDEZ MEDICAL FOUNDATION";
replace muni="Tacloban City" if school=="REMEDIOS T. ROMUALDEZ MEDICAL FOUNDATION";
replace province="QUEZON" if school=="SAINT ANNE COLLEGE OF THE PACIFIC";
replace muni="Lucena" if school=="SAINT ANNE COLLEGE OF THE PACIFIC";
replace province="CAPIZ" if school=="SAINT ANTHONY COLLEGE - ROXAS CITY";
replace muni="Roxas City" if school=="SAINT ANTHONY COLLEGE - ROXAS CITY";
replace province="ANTIQUE" if school=="SAINT ANTHONY'S COLLEGE- ANTIQUE";
replace muni="Buenavista" if school=="SAINT ANTHONY'S COLLEGE- ANTIQUE";
replace province="ANTIQUE" if school=="SAINT GABRIEL COLLEGE - ANTIQUE";
replace muni="Sebaste" if school=="SAINT GABRIEL COLLEGE - ANTIQUE";
replace province="AKLAN" if school=="SAINT GABRIEL COLLEGE- KALIBO";
replace muni="Kalibo" if school=="SAINT GABRIEL COLLEGE- KALIBO";
replace province="SECOND DISTRICT" if school=="SAINT JOSEPH'S COLLEGE - QUEZON CITY";
replace muni="Quezon City" if school=="SAINT JOSEPH'S COLLEGE - QUEZON CITY";
replace province="FIRST DISTRICT" if school=="SAINT JUDE COLLEGE- MANILA";
replace muni="Sampaloc" if school=="SAINT JUDE COLLEGE- MANILA";
replace province="SURIGAO DEL NORTE" if school=="SAINT JUDE THADDEUS INSTITUTE OF TECHNOLOGY";
replace muni="Surigao City" if school=="SAINT JUDE THADDEUS INSTITUTE OF TECHNOLOGY";
replace province="BENGUET" if school=="SAINT LOUIS UNIVERSITY";
replace muni="Baguio City" if school=="SAINT LOUIS UNIVERSITY";
replace province="FIRST DISTRICT" if school=="SAINT PAUL COLLEGE SAINT RITA COLLEGE";
replace muni="Quiapo" if school=="SAINT PAUL COLLEGE SAINT RITA COLLEGE";
replace province="ILOCOS SUR" if school=="SAINT PAUL COLLEGE- ILOCOS SUR";
replace muni="Bantay" if school=="SAINT PAUL COLLEGE- ILOCOS SUR";
replace province="SURIGAO DEL NORTE" if school=="SAINT PAUL UNIVERSITY- SURIGAO";
replace muni="Surigao City" if school=="SAINT PAUL UNIVERSITY- SURIGAO";
replace province="CAGAYAN" if school=="SAINT PAUL UNIVERSITY- TUGUEGARAO";
replace muni="Tuguegarao City" if school=="SAINT PAUL UNIVERSITY- TUGUEGARAO";
replace province="" if school=="SAINT PETER COLLEGE";
replace muni="" if school=="SAINT PETER COLLEGE";
replace province="FIRST DISTRICT" if school=="SAINT RITA'S COLLEGE- MANILA";
replace muni="Quiapo" if school=="SAINT RITA'S COLLEGE- MANILA";
replace province="FIRST DISTRICT" if school=="SAINT RITA'S COLLEGE- MANILA";
replace muni="Quiapo" if school=="SAINT RITA'S COLLEGE- MANILA";
replace school="SAINT THERESA COLLEGE-TANDAG" if school_orig=="SAINT THERESAS COLLEGE";
replace province="SURIGAO DEL SUR" if school=="SAINT THERESA COLLEGE-TANDAG";
replace muni="Tandag" if school=="SAINT THERESA COLLEGE-TANDAG";
replace province="DAVAO DEL SUR" if school=="SAN PEDRO COLLEGE- DAVAO CITY";
replace muni="Davao City" if school=="SAN PEDRO COLLEGE- DAVAO CITY";
replace province="FIRST DISTRICT" if school=="SAN SEBASTIAN COLLEGE- RECOLETOS";
replace muni="Quiapo" if school=="SAN SEBASTIAN COLLEGE- RECOLETOS";
replace province="SECOND DISTRICT" if school=="SIENNA COLLEGE-QUEZON CITY";
replace muni="Quezon City" if school=="SIENNA COLLEGE-QUEZON CITY";
replace province="SOUTH COTABATO" if school=="SOUTH COTABATO SCHOOL OF MIDWIFERY";
replace province="MAGUINDANAO" if school=="SOUTHERN PHILIPPINES COLLEGE OF SCIENCES & HEALTH EDUCATION";
replace muni="Shariff Kabunsuan" if school=="SOUTHERN PHILIPPINES COLLEGE OF SCIENCES & HEALTH EDUCATION";
replace province="LANAO DEL NORTE" if school=="ST MICHAEL'S COLLEGE- ILLIGAN CITY";
replace muni="Illigan City" if school=="ST MICHAEL'S COLLEGE- ILLIGAN CITY";
replace province="MISAMIS ORIENTAL" if school=="ST. MARY'S ACADEMY OF CARMEN";
replace muni="CAGAYAN DE ORO" if school=="ST. MARY'S ACADEMY OF CARMEN";
replace province="BULACAN" if school=="STA. CLARA DE MONTEFALCO COLLEGE, INC";
replace muni="Meycuayan" if school=="STA. CLARA DE MONTEFALCO COLLEGE, INC";
replace province="FOURTH DISTRICT" if school=="STI COLLEGE- MANILA- LAS PINAS";
replace muni="Las Pinas" if school=="STI COLLEGE- MANILA- LAS PINAS";
replace province="CAVITE" if school=="STI COLLEGE- SOUTHERN LUZON- CARMONA";
replace muni="Carmona" if school=="STI COLLEGE- SOUTHERN LUZON- CARMONA";
replace province="QUEZON" if school=="STI COLLEGE- SOUTHERN LUZON- LUCENA";
replace muni="Lucena City" if school=="STI COLLEGE- SOUTHERN LUZON- LUCENA";
replace province="" if school=="STI COLLEGES OF LUZON, INC";
replace muni="" if school=="STI COLLEGES OF LUZON, INC";
replace province="BATAAN" if school=="TOMAS DEL ROSARIO COLLEGE";
replace muni="Balanga City" if school=="TOMAS DEL ROSARIO COLLEGE";
replace province="SECOND DISTRICT" if school=="TRINITY-COLLEGE - QUEZON CITY";
replace muni="Quezon City" if school=="TRINITY-COLLEGE - QUEZON CITY";
replace province="CAMARINES SUR" if school=="UNIVERSIDAD DE STA ISABEL";
replace muni="Naga City" if school=="UNIVERSIDAD DE STA ISABEL";
replace province="NORTHERN SAMAR" if school=="UNIVERSITY OF EASTERN PHILIPPINES- CATARMAN";
replace muni="Catarman" if school=="UNIVERSITY OF EASTERN PHILIPPINES- CATARMAN";
replace province="ISABELA" if school=="UNIVERSITY OF LA SALETTE- SANTIAGO";
replace muni="Santiago City" if school=="UNIVERSITY OF LA SALETTE- SANTIAGO";
replace province="" if school=="UNIVERSITY OF NORTHWESTERN PHILIPPINES";
replace muni="" if school=="UNIVERSITY OF NORTHWESTERN PHILIPPINES";
replace province="MISAMIS ORIENTAL" if school=="UNIVERSITY OF SCIENCE AND TECHNOLOGY OF SOUTHERN PHILIPPINES";
replace muni="Cagayan De Oro City" if school=="UNIVERSITY OF SCIENCE AND TECHNOLOGY OF SOUTHERN PHILIPPINES";
replace province="COTABATO" if school=="UNIVERSITY OF SOUTHERN MINDANAO- KABACAN";
replace muni="Kabacan" if school=="UNIVERSITY OF SOUTHERN MINDANAO- KABACAN";
replace province="ILOILO" if school=="UNIVERSITY OF THE PHIL.-VISAYAS";
replace province="SECOND DISTRICT" if muni=="Quezon City";
replace province="FIRST DISTRICT" if school=="UNIVERSITY OF THE PHILIPPINES- MANILA";
replace muni="Ermita" if school=="UNIVERSITY OF THE PHILIPPINES- MANILA";
replace province="PAMPANGA" if school=="VIRGEN DE LOS REMEDIOS MEDICAL CENTER SCHOOL";
replace muni="San Fernando City" if school=="VIRGEN DE LOS REMEDIOS MEDICAL CENTER SCHOOL";
replace province="LEYTE" if school=="VISAYAS STATE UNIVERSITY- BAYBAY";
replace muni="Baybay City" if school=="VISAYAS STATE UNIVERSITY- BAYBAY";
replace province="LANAO DEL SUR" if school=="WESTERN MINDANAO STATE UNIVERSITY-MARAWI CITY";
replace muni="Marawi City" if school=="WESTERN MINDANAO STATE UNIVERSITY-MARAWI CITY";
replace province="" if school=="SAINT MARY'S";
replace province="NEGROS OCCIDENTAL" if school=="NEGROS STATE COLLEGE OF AGRICULTURE)";
replace muni="Kabankalan" if school=="NEGROS STATE COLLEGE OF AGRICULTURE";
replace province="ISABELA" if school=="OUR LADY OF THE PILLAR'S COLLEGE";
replace muni="San Juan" if school=="SAINT MARY'S COLLEGE OF SAN JUAN";
replace province="SECOND DISTRICT" if school=="SAINT MARY'S COLLEGE OF SAN JUAN";

#delimit ;
egen tag=tag(province school);
keep if tag==1; 
keep province school;

save "${data}School-List-Final_forstata_clean.dta", replace; 
**************************************************************;
*Read in dataset;
#delimit ;
use "${rawdata}qual_appended.dta", clear;

*Create counter for number of schools;
gen counter=1;

do "${root}2_clean_school_names.do";

*SAVE CLEANED DATASET;
save "${data}qual_appended_clean.dta", replace;

*********************************************;
label variable school "School name";
label variable examinees "Number taking exam";
label variable passed "Number passing exam";
label variable cond "Conditional";
label variable failed "Number failing exam";
label variable month "Month of Exam";
label variable year "Year of Exam";
label variable school_code "School Code";
label variable examinees_ft "Number taking exam, First Time";
label variable passed_ft "Number passing exam, First Time";
label variable cond_ft "Conditional, First Time";
label variable failed_ft "Number failing exam, First Time";
label variable examinees_rep "Number taking exam, Repeaters";
label variable passed_rep "Number passing exam, Repeaters";
label variable cond_rep "Conditional, Repeaters";
label variable failed_rep "Number failing exam, Repeaters";

collapse (sum) examine* cond* failed* passed*, by(month year school);

*********************************************;
*Merge with geographic data;
merge n:1 school using "${data}School-List-Final_forstata_clean.dta"; 

*Add geography for non-merges;
replace province="FIRST DISTRICT" if school=="ARAULLO HIGH SCHOOL";

*Keep only those with province assigned;
keep if _merge==3;
drop _merge; 

save "${data}qual_appended_geo_temp.dta", replace;

*********************************************;
*CLEAN VALUES OF DATA;
#delimit ;
use "${data}qual_appended_geo_temp.dta", clear;

collapse (sum) examine* cond* failed* passed*, by(year school province);
egen id=group(school);

sort school year;

*Clean outliers of examinees;
*Current year bigger than three times surrounding years;
gen tag=1 if (examinees>3*examinees[_n+1]&examinees>3*examinees[_n-1])&(school[_n]==school[_n+1])&(school[_n]==school[_n-1]);
*Current year smaller than three times surrounding years;
gen tag2=1 if ((examinees<(1/3)*examinees[_n+1])&(examinees<(1/3)*examinees[_n-1]))&(school[_n]==school[_n+1])&(school[_n]==school[_n-1]);

*First year of a school;
gen tag3=1 if ((examinees>3*examinees[_n+1])|(examinees<(1/3)*examinees[_n+1]))&(school[_n]==school[_n+1])&(school[_n]!=school[_n-1]);

*Clean outliers of passers;
*Current year bigger than three times surrounding years;
gen pag=1 if (passed>3*passed[_n+1]&passed>3*passed[_n-1])&(school[_n]==school[_n+1])&(school[_n]==school[_n-1]);
*Current year smaller than three times surrounding years;
gen pag2=1 if ((passed<(1/3)*passed[_n+1])&(passed<(1/3)*passed[_n-1]))&(school[_n]==school[_n+1])&(school[_n]==school[_n-1]);

*Replace with average of the two surrounding years;
replace passed=((passed[_n+1]+passed[_n-1])/2) if (pag==1|pag2==1)&passed!=0;

save "${data}qual_appended_geo.dta", replace;

